﻿
#Region "Imports"

Imports System
Imports System.Data.OleDb
Imports System.Globalization
Imports System.Text
Imports Microsoft.VisualBasic.FileIO

#End Region

'********************************************************************************************************************
' Author:       Hiske Bekkering
' Date:         December 30th, 2016
' Description:  Acts as a data-flow source object for reading CSV files, allowing for dynamic column-mapping.
'********************************************************************************************************************
' The code in this class depends on there being the following package-level variables:
' • User::ColumnDelimiter (String):     -> The character to use to separate columns.
' • User::CurrentInputFile (String):    -> Name and path of the file being imported.
' • User::MappingsRecordSet (RecordSet):-> A recordset containing field mappings, retrieved in a previous task.
'
' The expected fields in the mappings recordset are:
' • FileColumnName (String):            -> The name of the column as expected in the file.
' • MapToOutPut (String):               -> The name of the output column to map the data to.
' • OutputDataTypeName (String):        -> The .Net equivalent of the column's data-type.
' • IsNullable (Boolean):               -> Indicates whether the column allows nulls.
'********************************************************************************************************************
' This class also uses the value of the Component's LocaleID property as specified in the designer. The specified 
' LocaleID is used when formatting date-time and numeric values, as well as for interpreting the decimal sign used in 
' the files. If the Locale does not match the decimal signs used in the files, floating point output may be incorrect
'********************************************************************************************************************
<Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute>
<CLSCompliant(False)>
Public Class ScriptMain
    Inherits UserComponent


#Region " Private Fields "

    Private _Filename As String
    Private _BufferTable As DataTable = Nothing
    Private _FormatProvider As CultureInfo
    Private _StartTime As DateTime

    ' The MAPPING_COUNT should always match the number of fields in the target table!
    ' And all of those fields should be handled in the CreateNewOutputRows method.
    Private Const MAPPING_COUNT As Integer = 6

#End Region


#Region " Public Methods "

    ''' <summary>
    ''' Pushes buffered data to output columns.
    ''' </summary>
    Public Overrides Sub CreateNewOutputRows()
        Dim rowCount As Integer = 0
        Dim startTime As DateTime = DateTime.Now

        Try
            ' Verify existence and sanity of the buffer
            If _BufferTable IsNot Nothing AndAlso _BufferTable.Columns.Count = MAPPING_COUNT Then
                ' Report progress to the package
                ComponentMetaData.FireInformation(10, ComponentMetaData.Name, "Starting to write formatted data.", "", 0, False)

                ' Cycle the rows in the buffer
                For Each row As DataRow In _BufferTable.Rows
                    ' Add a new row to the output buffer
                    Output0Buffer.AddRow()

                    ' Update the output buffer's fields with the values 
                    ' from the buffer table. This is where the actual field 
                    ' mapping is being done, by retrieving values from the 
                    ' recordset by their mapped names. See method PreExecute.
                    If row.IsNull("Channel") Then
                        Output0Buffer.Channel_IsNull = True
                    Else
                        Output0Buffer.Channel = row.Item("Channel").ToString()
                    End If

                    If row.IsNull("TimeBand") Then
                        Output0Buffer.TimeBand_IsNull = True
                    Else
                        Output0Buffer.TimeBand = row.Item("TimeBand").ToString()
                    End If

                    If row.IsNull("Column1") Then
                        Output0Buffer.Column1_IsNull = True
                    Else
                        Output0Buffer.Column1 = CType(row.Item("Column1"), Double)
                    End If

                    If row.IsNull("Column2") Then
                        Output0Buffer.Column2_IsNull = True
                    Else
                        Output0Buffer.Column2 = CType(row.Item("Column2"), Double)
                    End If

                    If row.IsNull("Column3") Then
                        Output0Buffer.Column3_IsNull = True
                    Else
                        Output0Buffer.Column3 = CType(row.Item("Column3"), Double)
                    End If

                    If row.IsNull("Column4") Then
                        Output0Buffer.Column4_IsNull = True
                    Else
                        Output0Buffer.Column4 = CType(row.Item("Column4"), Double)
                    End If

                    rowCount += 1
                Next

                ' Determine time spent and report progress
                Dim duration As TimeSpan = Now - startTime
                Dim messageText As String = "Finished writing " & rowCount.ToString("#,###,##0", _FormatProvider) &
                                            " rows of formatted data, imported from file " & _Filename & ", taking " & duration.ToString("G", _FormatProvider) & " to complete."

                ComponentMetaData.FireInformation(10, ComponentMetaData.Name, messageText, "", 0, False)
            End If

        Catch ex As Exception
            ' Report exception to the package
            ComponentMetaData.FireError(10, ComponentMetaData.Name, ex.Message, "", 0, False)
        End Try
    End Sub

    ''' <summary>
    ''' Reads the file and prepares its contents 
    ''' to be processed downstream in the data flow.
    ''' </summary>
    Public Overrides Sub PreExecute()
        MyBase.PreExecute()

        Dim columnIndex As Int32
        Dim columnName As String = String.Empty
        Dim currentFile As String = Variables.CurrentInputFile
        Dim currentRowValues As String()
        Dim delimiter As String = Variables.ColumnDelimiter
        Dim fieldDataType As System.Type
        Dim messageText As String
        Dim row As DataRow
        Dim upperBound As Int32
        Dim skippedColumn As Boolean = False
        Dim skippedColumns As New StringBuilder

        ' Record start
        _StartTime = Now

        ' Get the CultureInfo specified at design-time
        _FormatProvider = CultureInfo.GetCultureInfo(ComponentMetaData.LocaleID)

        ' Initialize buffer table
        If BuildImportBuffer() Then
            ' Store the file's name for later use
            _Filename = FileSystem.GetName(currentFile)

            ' Report progress to the package
            ComponentMetaData.FireInformation(10, ComponentMetaData.Name, "Initializing the process to read the contents of file " & _Filename & ".", "", 0, False)

            Try
                ' Initialize a parser to read CSV data from the current file
                Using fileReader As TextFieldParser = New TextFieldParser(currentFile)
                    fileReader.SetDelimiters(delimiter)

                    ' Preserve white space, to allow for typos
                    fileReader.TrimWhiteSpace = False

                    ' Read column names from the first line in the file
                    Dim fileColumns() As String = fileReader.ReadFields()

                    ' Read remaining lines
                    While Not fileReader.EndOfData
                        ' Read the values on the current line
                        currentRowValues = fileReader.ReadFields()

                        If Not currentRowValues Is Nothing Then
                            ' See how many values there are on this line
                            upperBound = currentRowValues.GetUpperBound(0)

                            ' Create a new buffer row
                            row = _BufferTable.NewRow

                            Try
                                ' Cycle the values in the array
                                For columnIndex = 0 To upperBound
                                    ' Get the column's name from the array
                                    ' This gets the name as declared in the file
                                    columnName = fileColumns(columnIndex)

                                    ' Set the value only if the 
                                    ' column is not being skipped
                                    If (Not skippedColumn _
                                    OrElse (skippedColumn AndAlso Not skippedColumns.ToString().Contains(columnName))) Then
                                        ' Retrieve the DataType of the column at hand
                                        fieldDataType = _BufferTable.Columns(columnName).DataType

                                        ' Store the value in the field on the new row in the buffer, converting it to the column's DataType using the 
                                        ' component's LocaleID. The interpretation of the decimal sign depends on using the proper Locale here; we're 
                                        ' using the locale specified in the component's properties. If the column name as declared in the file is not 
                                        ' found in the mappings table, it will not be found in the row, an exception will be thrown and a warning issued.
                                        ' If the value from the file violates any rules pertaining to the conversion (e.g. an empty string being cast to a 
                                        ' numeric field, an exception will also be thrown, and a warning issued. In both cases, the column will be ignored.
                                        row.Item(columnName) = Convert.ChangeType(currentRowValues(columnIndex).Trim(), fieldDataType, _FormatProvider)
                                    End If
                                Next

                            Catch ex As Exception When TypeOf ex Is InvalidCastException OrElse TypeOf ex Is FormatException OrElse TypeOf ex Is OverflowException
                                ' Determine warning text
                                If columnName.Length > 0 Then
                                    skippedColumn = True

                                    If Not skippedColumns.ToString().Contains(columnName) Then
                                        ' Make sure we ignore it from now on
                                        skippedColumns.Append(columnName & ", ")
                                    End If

                                    messageText = "There's a problem with data specified in a column with the name of [" & columnName &
                                                  "], in file " & _Filename & ". This column's contents will be ignored, but other columns will be imported when valid. Possible causes are absence of data, formatting issues or an overflow. The problem was: " & ex.Message
                                Else
                                    messageText = "There's a problem with a column's data specified in file " & _Filename & ". Possible causes are absence of data, formatting issues or an overflow. The problem was: " & ex.Message
                                End If

                                ' Report warning to the package
                                ComponentMetaData.FireWarning(10, ComponentMetaData.Name, messageText, "", 0)

                            Catch ex As Exception When TypeOf ex Is NullReferenceException OrElse TypeOf ex Is ArgumentException
                                ' Determine warning text
                                If columnName.Length > 0 Then
                                    skippedColumn = True

                                    If Not skippedColumns.ToString().Contains(columnName) Then
                                        ' Make sure we ignore it from now on
                                        skippedColumns.Append(columnName & ", ")
                                    End If

                                    messageText = "There's a problem regarding a column with the name of [" & columnName & "], declared in file " & _Filename &
                                                  ". This column appears to be unmapped so its contents will be ignored, but other columns will be imported when valid. Verify that the fieldmappings are correct. The problem was: " & ex.Message
                                Else
                                    messageText = "There's a problem with one of the columns in file " & _Filename & ". The problem was: " & ex.Message
                                End If

                                ' Report warning to the package
                                ComponentMetaData.FireWarning(10, ComponentMetaData.Name, messageText, "", 0)
                            End Try

                            ' Add the row to the buffer
                            _BufferTable.Rows.Add(row)
                        End If
                    End While
                End Using ' End Using fileReader

                ' Commit values to the buffer
                _BufferTable.AcceptChanges()

                ' Rename the columns to their mapped name, so that from now on, we 
                ' can match columns to outputs. See the CreateNewOutputRows method
                For Each column As MappedDataColumn In _BufferTable.Columns
                    column.ColumnName = column.MapToColumn
                Next

                ' Determine time spent and report progress
                Dim duration As TimeSpan = (Now - _StartTime)
                ComponentMetaData.FireInformation(10, ComponentMetaData.Name, "Finished reading " &
                                                  _BufferTable.Rows.Count.ToString("#,###,##0", _FormatProvider) &
                                                  " lines of raw data from file " & _Filename & ", taking " & duration.ToString("G", _FormatProvider) & " to complete.", "", 0, False)

            Catch Ex As Exception
                ' Report exception to the package. The ReadFields function might throw a MalFormedLineException,
                ' but we'll cath anything here, to ensure proper reporting of the problem to the package.
                ComponentMetaData.FireError(10, ComponentMetaData.Name, Ex.Message, "", 0, False)
                Exit Sub
            End Try
        End If
    End Sub

    ''' <summary>
    ''' Called when the component has 
    ''' finished executing its tasks.
    ''' </summary>
    Public Overrides Sub PostExecute()
        MyBase.PostExecute()

        ' Clean up the buffer
        If _BufferTable IsNot Nothing Then
            _BufferTable.Clear()
            _BufferTable.Dispose()
        End If

        ' Determine time spent and report finish
        Dim duration As TimeSpan = (DateTime.Now - _StartTime)
        ComponentMetaData.FireInformation(10, ComponentMetaData.Name, "Finished reading and importing file " & _Filename & ". Duration: " & duration.ToString("G", _FormatProvider), "", 0, False)
    End Sub

#End Region


#Region " Private Methods "

    ''' <summary>
    ''' Builds the _BufferTable table, defining its columns.
    ''' </summary>
    Private Function BuildImportBuffer() As Boolean
        Dim column As MappedDataColumn
        Dim columnName As String
        Dim columnMapToName As String
        Dim columnMapToType As String
        Dim isNullable As Boolean = False
        Dim mappingsTable As New DataTable("mappingsTable")
        Dim mappingsFound As Boolean = False

        Try
            ' Initialize a table to act as a buffer
            _BufferTable = New DataTable("_BufferTable")

            ' Read mappings from the MappingsRecordSet retrieved previously
            Using adapter As OleDbDataAdapter = New OleDbDataAdapter()
                adapter.Fill(mappingsTable, Variables.MappingsRecordSet)
            End Using

            ' See if we got the required number of mappings
            If mappingsTable.Rows.Count = MAPPING_COUNT Then
                For Each row As DataRow In mappingsTable.Rows
                    ' Get the values from the fields on each row
                    columnName = row.Item("FileColumnName").ToString()
                    columnMapToName = row.Item("MapToOutPut").ToString()
                    columnMapToType = row.Item("OutputDataTypeName").ToString()
                    isNullable = CType(row.Item("IsNullable"), Boolean)

                    ' Create a new MappedDataColumn, declared in this class, which has one extra property named MapToColumn to 
                    ' store the name of the target field in the destination. Initially, this column has the name as declared in 
                    ' the file being imported. After reading the file, the column will be renamed to its mapped name, which is 
                    ' the same as the name in the destination table so its contents can be matched in method CreateNewOutputRows.
                    _BufferTable.Columns.Add(New MappedDataColumn(columnName, columnMapToName, System.Type.GetType(columnMapToType), isNullable))
                Next row

                mappingsFound = True
            Else
                ' Report fatal problem to the package, failing the job running the package contaning this script
                Dim messageText As String = "Failed to retrieve the required number of field mappings. Required are " & MAPPING_COUNT.ToString() &
                                            " mappings; found " & mappingsTable.Rows.Count.ToString() & " mappings."
                ComponentMetaData.FireError(10, ComponentMetaData.Name, messageText, "", 0, False)
            End If

        Catch ex As Exception
            ' Report exception to the package
            ComponentMetaData.FireError(10, ComponentMetaData.Name, ex.Message, "", 0, False)
        Finally
            ' Clear the mappings; no longer needed
            If mappingsTable IsNot Nothing Then
                mappingsTable.Clear()
                mappingsTable.Dispose()
            End If
        End Try

        Return mappingsFound
    End Function

#End Region


#Region " Nested Class MappedDataColumn "

    ''' <summary>
    ''' Represents the schema of a column in a DataTable,
    ''' allowing for the column to be mapped to an output column.
    ''' </summary>
    Private Class MappedDataColumn
        Inherits DataColumn

        ''' <summary>
        ''' Inititalizes a new instance of the MappedDataColumn class with 
        ''' the specified column name, data-type, mapping and nullability.
        ''' </summary>
        ''' <param name="columnName">A string that represents the name of the column to be created.</param>
        ''' <param name="mapToName">A string that represents the name of the output to map to.</param>
        ''' <param name="dataType">A supported DataType.</param>
        ''' <param name="allowNull">True to allow null's; otherwise, false.</param>
        Public Sub New(ByVal columnName As String, ByVal mapToName As String, ByVal dataType As System.Type, ByVal allowNull As Boolean)
            MyBase.New(columnName, dataType)

            AllowDBNull = allowNull
            MapToColumn = mapToName
            Unique = False
        End Sub

        ''' <summary>
        ''' Gets or sets the the name of the output column 
        ''' downstream to map the values in this column to.
        ''' </summary>
        Public Property MapToColumn As String

    End Class

#End Region


End Class
